{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8bca080d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a88ce2c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Artist</th>\n",
       "      <th>ConstituentID</th>\n",
       "      <th>ArtistBio</th>\n",
       "      <th>Nationality</th>\n",
       "      <th>BeginDate</th>\n",
       "      <th>EndDate</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Date</th>\n",
       "      <th>Medium</th>\n",
       "      <th>...</th>\n",
       "      <th>ThumbnailURL</th>\n",
       "      <th>Circumference (cm)</th>\n",
       "      <th>Depth (cm)</th>\n",
       "      <th>Diameter (cm)</th>\n",
       "      <th>Height (cm)</th>\n",
       "      <th>Length (cm)</th>\n",
       "      <th>Weight (kg)</th>\n",
       "      <th>Width (cm)</th>\n",
       "      <th>Seat Height (cm)</th>\n",
       "      <th>Duration (sec.)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ferdinandsbrücke Project, Vienna, Austria (Ele...</td>\n",
       "      <td>Otto Wagner</td>\n",
       "      <td>6210</td>\n",
       "      <td>(Austrian, 1841–1918)</td>\n",
       "      <td>(Austrian)</td>\n",
       "      <td>(1841)</td>\n",
       "      <td>(1918)</td>\n",
       "      <td>(Male)</td>\n",
       "      <td>1896</td>\n",
       "      <td>Ink and cut-and-pasted painted pages on paper</td>\n",
       "      <td>...</td>\n",
       "      <td>http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>48.6000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>168.9000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>City of Music, National Superior Conservatory ...</td>\n",
       "      <td>Christian de Portzamparc</td>\n",
       "      <td>7470</td>\n",
       "      <td>(French, born 1944)</td>\n",
       "      <td>(French)</td>\n",
       "      <td>(1944)</td>\n",
       "      <td>(0)</td>\n",
       "      <td>(Male)</td>\n",
       "      <td>1987</td>\n",
       "      <td>Paint and colored pencil on print</td>\n",
       "      <td>...</td>\n",
       "      <td>http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>40.6401</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>29.8451</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Villa near Vienna Project, Outside Vienna, Aus...</td>\n",
       "      <td>Emil Hoppe</td>\n",
       "      <td>7605</td>\n",
       "      <td>(Austrian, 1876–1957)</td>\n",
       "      <td>(Austrian)</td>\n",
       "      <td>(1876)</td>\n",
       "      <td>(1957)</td>\n",
       "      <td>(Male)</td>\n",
       "      <td>1903</td>\n",
       "      <td>Graphite, pen, color pencil, ink, and gouache ...</td>\n",
       "      <td>...</td>\n",
       "      <td>http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.3000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>31.8000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Manhattan Transcripts Project, New York, N...</td>\n",
       "      <td>Bernard Tschumi</td>\n",
       "      <td>7056</td>\n",
       "      <td>(French and Swiss, born Switzerland 1944)</td>\n",
       "      <td>()</td>\n",
       "      <td>(1944)</td>\n",
       "      <td>(0)</td>\n",
       "      <td>(Male)</td>\n",
       "      <td>1980</td>\n",
       "      <td>Photographic reproduction with colored synthet...</td>\n",
       "      <td>...</td>\n",
       "      <td>http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>50.8000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>50.8000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Villa, project, outside Vienna, Austria, Exter...</td>\n",
       "      <td>Emil Hoppe</td>\n",
       "      <td>7605</td>\n",
       "      <td>(Austrian, 1876–1957)</td>\n",
       "      <td>(Austrian)</td>\n",
       "      <td>(1876)</td>\n",
       "      <td>(1957)</td>\n",
       "      <td>(Male)</td>\n",
       "      <td>1903</td>\n",
       "      <td>Graphite, color pencil, ink, and gouache on tr...</td>\n",
       "      <td>...</td>\n",
       "      <td>http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>38.4000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>19.1000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               Title  \\\n",
       "0  Ferdinandsbrücke Project, Vienna, Austria (Ele...   \n",
       "1  City of Music, National Superior Conservatory ...   \n",
       "2  Villa near Vienna Project, Outside Vienna, Aus...   \n",
       "3  The Manhattan Transcripts Project, New York, N...   \n",
       "4  Villa, project, outside Vienna, Austria, Exter...   \n",
       "\n",
       "                     Artist ConstituentID  \\\n",
       "0               Otto Wagner          6210   \n",
       "1  Christian de Portzamparc          7470   \n",
       "2                Emil Hoppe          7605   \n",
       "3           Bernard Tschumi          7056   \n",
       "4                Emil Hoppe          7605   \n",
       "\n",
       "                                   ArtistBio Nationality BeginDate EndDate  \\\n",
       "0                      (Austrian, 1841–1918)  (Austrian)    (1841)  (1918)   \n",
       "1                        (French, born 1944)    (French)    (1944)     (0)   \n",
       "2                      (Austrian, 1876–1957)  (Austrian)    (1876)  (1957)   \n",
       "3  (French and Swiss, born Switzerland 1944)          ()    (1944)     (0)   \n",
       "4                      (Austrian, 1876–1957)  (Austrian)    (1876)  (1957)   \n",
       "\n",
       "   Gender  Date                                             Medium  ...  \\\n",
       "0  (Male)  1896      Ink and cut-and-pasted painted pages on paper  ...   \n",
       "1  (Male)  1987                  Paint and colored pencil on print  ...   \n",
       "2  (Male)  1903  Graphite, pen, color pencil, ink, and gouache ...  ...   \n",
       "3  (Male)  1980  Photographic reproduction with colored synthet...  ...   \n",
       "4  (Male)  1903  Graphite, color pencil, ink, and gouache on tr...  ...   \n",
       "\n",
       "                                        ThumbnailURL Circumference (cm)  \\\n",
       "0  http://www.moma.org/media/W1siZiIsIjU5NDA1Il0s...                NaN   \n",
       "1  http://www.moma.org/media/W1siZiIsIjk3Il0sWyJw...                NaN   \n",
       "2  http://www.moma.org/media/W1siZiIsIjk4Il0sWyJw...                NaN   \n",
       "3  http://www.moma.org/media/W1siZiIsIjEyNCJdLFsi...                NaN   \n",
       "4  http://www.moma.org/media/W1siZiIsIjEyNiJdLFsi...                NaN   \n",
       "\n",
       "  Depth (cm) Diameter (cm) Height (cm) Length (cm) Weight (kg)  Width (cm)  \\\n",
       "0        NaN           NaN     48.6000         NaN         NaN    168.9000   \n",
       "1        NaN           NaN     40.6401         NaN         NaN     29.8451   \n",
       "2        NaN           NaN     34.3000         NaN         NaN     31.8000   \n",
       "3        NaN           NaN     50.8000         NaN         NaN     50.8000   \n",
       "4        NaN           NaN     38.4000         NaN         NaN     19.1000   \n",
       "\n",
       "  Seat Height (cm) Duration (sec.)  \n",
       "0              NaN             NaN  \n",
       "1              NaN             NaN  \n",
       "2              NaN             NaN  \n",
       "3              NaN             NaN  \n",
       "4              NaN             NaN  \n",
       "\n",
       "[5 rows x 29 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('./data/Artworks.csv').head(100)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "114e7b6c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1976-77    25\n",
       "1980-81    15\n",
       "1979       12\n",
       "Unknown     7\n",
       "1980        5\n",
       "1978        5\n",
       "1917        5\n",
       "1923        4\n",
       "1935        3\n",
       "1903        2\n",
       "1987        2\n",
       "1974        1\n",
       "1936        1\n",
       "1930        1\n",
       "c. 1917     1\n",
       "n.d.        1\n",
       "1896        1\n",
       "1986        1\n",
       "1984        1\n",
       "1975        1\n",
       "1918        1\n",
       "1906        1\n",
       "1905        1\n",
       "1900        1\n",
       "1968        1\n",
       "1970        1\n",
       "Name: Date, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Date'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "40212e7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将范围日期清洗为开始日期，以‘-’为split切分\n",
    "row_with_dashes = df['Date'].str.contains('-').fillna(False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b12a5a2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将结果的第一部分作为处理的最终结果\n",
    "for i, dash in df[row_with_dashes].iterrows():\n",
    "    df.at[i,'Date'] = dash['Date'][0:4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "0aca8a9f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1976       25\n",
       "1980       20\n",
       "1979       12\n",
       "Unknown     7\n",
       "1978        5\n",
       "1917        5\n",
       "1923        4\n",
       "1935        3\n",
       "1903        2\n",
       "1987        2\n",
       "1974        1\n",
       "1936        1\n",
       "1930        1\n",
       "c. 1917     1\n",
       "n.d.        1\n",
       "1896        1\n",
       "1986        1\n",
       "1984        1\n",
       "1975        1\n",
       "1918        1\n",
       "1906        1\n",
       "1905        1\n",
       "1900        1\n",
       "1968        1\n",
       "1970        1\n",
       "Name: Date, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Date'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e7b99648",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 把包含‘c’的日期过滤出来\n",
    "row_with_cs = df['Date'].str.contains('c').fillna(False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "412f8c98",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 对过滤出的数据，取后四位作为最终日期数据\n",
    "for i,row in df[row_with_cs].iterrows():\n",
    "    df.at[i,'Date'] = row['Date'][-4:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "2879a4db",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Title</th>\n",
       "      <th>Artist</th>\n",
       "      <th>ConstituentID</th>\n",
       "      <th>ArtistBio</th>\n",
       "      <th>Nationality</th>\n",
       "      <th>BeginDate</th>\n",
       "      <th>EndDate</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Date</th>\n",
       "      <th>Medium</th>\n",
       "      <th>...</th>\n",
       "      <th>ThumbnailURL</th>\n",
       "      <th>Circumference (cm)</th>\n",
       "      <th>Depth (cm)</th>\n",
       "      <th>Diameter (cm)</th>\n",
       "      <th>Height (cm)</th>\n",
       "      <th>Length (cm)</th>\n",
       "      <th>Weight (kg)</th>\n",
       "      <th>Width (cm)</th>\n",
       "      <th>Seat Height (cm)</th>\n",
       "      <th>Duration (sec.)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>Villa Snellman, Djursholm, Sweden, Elevation o...</td>\n",
       "      <td>Erik Gunnar Asplund</td>\n",
       "      <td>27</td>\n",
       "      <td>(Swedish, 1885–1940)</td>\n",
       "      <td>(Swedish)</td>\n",
       "      <td>(1885)</td>\n",
       "      <td>(1940)</td>\n",
       "      <td>(Male)</td>\n",
       "      <td>1917</td>\n",
       "      <td>Graphite and crayon on tracing paper mounted o...</td>\n",
       "      <td>...</td>\n",
       "      <td>http://www.moma.org/media/W1siZiIsIjc3OSJdLFsi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                Title               Artist  \\\n",
       "78  Villa Snellman, Djursholm, Sweden, Elevation o...  Erik Gunnar Asplund   \n",
       "\n",
       "   ConstituentID             ArtistBio Nationality BeginDate EndDate  Gender  \\\n",
       "78            27  (Swedish, 1885–1940)   (Swedish)    (1885)  (1940)  (Male)   \n",
       "\n",
       "    Date                                             Medium  ...  \\\n",
       "78  1917  Graphite and crayon on tracing paper mounted o...  ...   \n",
       "\n",
       "                                         ThumbnailURL Circumference (cm)  \\\n",
       "78  http://www.moma.org/media/W1siZiIsIjc3OSJdLFsi...                NaN   \n",
       "\n",
       "   Depth (cm) Diameter (cm) Height (cm) Length (cm) Weight (kg)  Width (cm)  \\\n",
       "78        NaN           NaN        23.2         NaN         NaN        34.9   \n",
       "\n",
       "   Seat Height (cm) Duration (sec.)  \n",
       "78              NaN             NaN  \n",
       "\n",
       "[1 rows x 29 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[row_with_cs]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c5d87b05",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1976    25\n",
       "1980    20\n",
       "1979    12\n",
       "0        8\n",
       "1917     6\n",
       "1978     5\n",
       "1923     4\n",
       "1935     3\n",
       "1903     2\n",
       "1987     2\n",
       "1986     1\n",
       "1936     1\n",
       "1930     1\n",
       "1974     1\n",
       "1896     1\n",
       "1984     1\n",
       "1975     1\n",
       "1970     1\n",
       "1906     1\n",
       "1905     1\n",
       "1900     1\n",
       "1968     1\n",
       "1918     1\n",
       "Name: Date, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Date'] = df['Date'].replace('Unknown','0',regex=True)\n",
    "df['Date'] = df['Date'].replace('n.d.','0',regex=True)\n",
    "df['Date'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f3af8d42",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
